// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace threedgut {

struct GUTParameters {
    struct Tiling {
        static constexpr uint32_t BlockX         = 16;
        static constexpr uint32_t BlockY         = 16;
        static constexpr uint32_t InvalidTileIdx = -1U;
        static constexpr uint32_t BlockSize      = BlockX * BlockY;
        static constexpr uint32_t WarpSize       = 32;
        static constexpr uint32_t NumWarps       = BlockSize / WarpSize;
        static constexpr uint32_t WarpMask       = 0xFFFFFFFFU;
        
        // Fine-grained load balancing parameters - base dimensions
        static constexpr uint32_t VirtualTileX           = 2;       // virtual tile width in pixels
        static constexpr uint32_t VirtualTileY           = 2;       // virtual tile height in pixels
        // Derived constants from base dimensions
        static constexpr uint32_t VirtualTileSize        = VirtualTileX * VirtualTileY;  // 4 pixels per virtual tile
        static constexpr uint32_t VirtualTilesPerTileX   = BlockX / VirtualTileX;       // 8 virtual tiles per row
        static constexpr uint32_t VirtualTilesPerTileY   = BlockY / VirtualTileY;       // 8 virtual tiles per column
        static constexpr uint32_t VirtualTilesPerTile    = VirtualTilesPerTileX * VirtualTilesPerTileY;  // 64 total
        static constexpr uint32_t FineGrainedWarpsPerBlock = VirtualTileSize;           // 4 warps per block (1 per pixel)
        static constexpr uint32_t FineGrainedThreadsPerBlock = FineGrainedWarpsPerBlock * WarpSize;  // 128 threads
    };

    static constexpr uint32_t InvalidParticleIdx = -1U;
};

} // namespace threedgut